###########################################################
# Kerice Doten-Snitker & Steve Pfaff
# 
# Witch trials and diffusion
# Periodized climate data, from CCSM 4.0
#
# Script based on scripts from N. Gauthier
# https://github.com/nick-gauthier/Risk-Resilience/blob/master/CCSM4%20Europe%20mh6k.Rmd
# https://github.com/nick-gauthier/Risk-Resilience/blob/master/climate-analysis.Rmd
# for https://doi.org/10.1016/j.quascirev.2017.09.015 
#
# Built under R 3.6.1
# Platform: x86_64-apple-darwin15.6.0 (64-bit)
###########################################################

# set locale to preempt UTF-8 issues with the mydata
# Sys.getlocale()
# for Mac
# Sys.setlocale(category = "LC_ALL", locale = "en_US.UTF-8")
# for windows
# Sys.setlocale(category = "LC_ALL", locale = "English_United States.1252")

# add packages with the pacman library, which installs if not installed
# library(pacman)
# use the here package for improving replicability
# p_load(here)
# for data manipulation
# p_load(magrittr, plyr, tidyverse, reshape2)
# for GIS functions
# p_load(ncdf4) # to import global climate model (GCM) data
# p_load(rgdal) # to read GCM data
# p_load(raster) # to process GCM data
# p_load(rasterVis) # to plot GCM data
# p_load(EMD) # to calculate trends in the data

# set.seed(1487)

###
# Observations are locales (cities + county centroids) 1300-1800 in Europe
###

#### Sample Locations ----------------------------------------------

# Make a table with one row for each trial location;
# will need non-numeric identifiers in raster extraction and reshaping
sites_net <- trials_net %>%
  filter(!is.na(coorde_google)) %>%
  dplyr::select(bhpr_runningnr, newID, lon = coorde_google, lat = coordn_google) %>% # only need identifiers and coords
  distinct(bhpr_runningnr, .keep_all = TRUE) #%>% # unique list of locations

# The CCSM 4.0 has an extent of x(-0.625, 359.375) and y(-90.4712, 90.4712). Longitude/x values start at the prime meridian. Because there are no negative values, but there are negative values (west of the prime meridian), in the data, we take some extra steps to convert the negative values to positive (essentially, subtract them from 360)
sites_net <- sites_net %>%
  mutate(lon = if_else(lon<(-0.625), # if lon is outside extent to the west
                       360+lon, # replace with 360 + lon, which will subtract
                       lon))# otherwise leave as is

# convert to SpatialPointsDataFrame so we can extract from the climate rasters at these locations
crs_WGS84 <- CRS(SRS_string = "EPSG:4326")
coordinates(sites_net) <- c("lon", "lat") #WGS84/EPSG:4236
projection(sites_net) <- crs_WGS84

#### CCSM 4.0 Import and Preprocessing ------------------------------

# CCSM 4.0 raster data used here were downloaded, compiled, and shared by Nick Gauthier.
# We use the 850-1850 climate model, 
# which includes monthly observations at a 1 degree frequency.
# Raw, annual rasters can be downloaded at http://www.cesm.ucar.edu/models/ccsm4.0/

# First, import data from the CCSM 4.0 paleoclimate simulation (WGS84 projection). Convert the precipitation values to mm/year and temperature values to degrees Celsius.  
# Economists generally use data from Guiot, Corona, & ESCARCEL (https://doi.org/10.1371/journal.pone.0009972), which is growing season temps (April-Sept). Limit to these months with time = YYYY-[05:10]-01. The YYYY-05-01 value is for the previous month, so 05:10 is in fact April to September.

# It would be computationally most expedient to crop each rasterbrick to the study area before doing any mathematical or data transformations. However, because some observations are west of the prime meridian, and the raster's x (longitude) extent is only positive, we cannot crop it. Subset the months we want, extract values, and then do summarizing and scale adjustments.

# total monthly precipitation
CCSM.prc <- brick(here::here("GIS_data", "Rasters", "b40.lm850-1850.1deg.001.cam2.h0.PRECT.085001-185012-005.nc")) %>%
  subset(grep("\\.(0[5-9]|10)\\.", names(.), value=FALSE)) # growing season

# mean monthly temp
CCSM.t <- brick(here::here("GIS_data", "Rasters","b40.lm850-1850.1deg.001.cam2.h0.TREFHT.085001-185012-003.nc")) %>%
  subset(grep("\\.(0[5-9]|10)\\.", names(.), value=FALSE)) # growing season

#### Extract values for city network sample ------------------------------

# Then extract temperature and precipitation values at each site by using **raster's** *extract* function to get the values from the climate model brick.

# Pull all the CCSM data into one data frame, with one row per year, and one column per variable/location combination. First *rbind* the two sets of CCSM data and *transpose* the results, turning the rows into columns. Add a column for the year.

CCSM.prc.ann.mat.net <- data.frame(sites_net$newID,
                               raster::extract(CCSM.prc, sites_net, layer=1)) %>%
  mutate(sites_net.newID = as.character(sites_net.newID)) %>%
  gather(key="time", value="prc", -sites_net.newID) %>% # one row per precip. value
  mutate(year = as.numeric(str_extract(time, "\\d{4}")), # extract year from date string
         prc = prc*2629743830) %>% # convert to mm
  group_by(sites_net.newID, year) %>%
  summarize(prc = mean(prc)) %>% # mean prc per site per year
  ungroup()

CCSM.t.ann.mat.net <- data.frame(sites_net$newID,
                             raster::extract(CCSM.t, sites_net, layer=1)) %>%
  mutate(sites_net.newID = as.character(sites_net.newID)) %>%
  gather(key="time", value="temp", -sites_net.newID) %>% # one row per precip. value
  mutate(year = as.numeric(str_extract(time, "\\d{4}")), # extract year from date string
         temp = temp-273.15) %>% # convert from kelvin to C
  group_by(sites_net.newID, year) %>%
  summarize(temp = mean(temp)) %>% # mean temp per site per year
  ungroup()

climate_net <- merge(CCSM.prc.ann.mat.net, CCSM.t.ann.mat.net,
                 by.x=c("year", "sites_net.newID"), by.y=c("year", "sites_net.newID"), 
                 all.x=TRUE, all.y=TRUE)

climate_net <- climate_net %>%
  mutate(newID = sites_net.newID) %>%
  dplyr::select(-sites_net.newID)

# saveRDS(climate_net, here::here("Witchtrial_diffusion", "out_data", "CCSM_climate_net_annual.rds"))
write_csv(climate_net, here::here("Witchtrial_diffusion", "out_data", "CCSM_climate_net_annual.csv"))
